/* START 1_genLocPairs.do */

/* this script merges in the subzone files with the .dta farecard data from 2015-2016. */
set more off
cap clear

/* PATHS */
qui do "code/config.do"

/* load accessory functions */
qui do "code/99_accessoryFunctions.do"

/* load file first */ 
/* use "${data_local}/1dec2015_10dec2015.dta", replace */
use "21feb2016_1mar2016.dta", replace
format CRD_NUM %18.0g

/* eliminate implausibly long commutes (> 2 hours long: SBS Transit)
and invalid commute times (but keep the origin locations) */

replace END = . if JRNY_DEST_ID_NUM == .
gen t = minutes(END-START)
keep if t > 0 & t <= 120 | t == .

// eliminate cash payers and transit staff (< 3% of sample)
drop if CRD_NUM == 9999 | TKT_TYP_CD <= 0 | ///
  PATRON_CATG_ID_NUM == . | PATRON_CATG_ID_NUM == 0 /* | JRNY_DEST_ID_NUM == . */ 

// restrict to last complete set of weekdays (Mon - Sun) for our data set
keep if START > tc("21feb2016 23:59:59") & START < tc("29feb2016 00:00:00")

/* check for CSV files mapping bus/MRT stops to MRT stations */
cap confirm file "make_data/bus_stop_subzones.csv"
local busIsHere = _rc
cap confirm file "make_data/mrt_station_subzones.csv"
if _rc != 0 | `busIsHere' != 0 { 
	linkTransitStops  /* DEPENDS ON PYTHON */
	}
cap macro drop busIsHere

/* merge subzones */
mergeSubzones orig
mergeSubzones dest

/* final dropping */
drop if orig_area == ""
drop if dest_area == "" & END != . /* keep even if they forgot to tap out */
gsort CRD_NUM START
order(orig_area dest_area orig_pl dest_pl), after(t)

/* encode subzone/planning area names */
foreach name in orig_area dest_area orig_pl dest_pl {
	local name2 = substr("`name'", 1, 1) + substr("`name'", 6, 1)
	encode `name', gen(`name2')
	drop `name'
	}

save "make_data/lastWeek.dta", replace
